from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import PyMuPDFReader
from llamaindex02 import show_json
from llama_index.core.node_parser import TokenTextSplitter

reader = SimpleDirectoryReader(
    input_dir="./data",
    required_exts=[".pdf"],
    file_extractor={".pdf":PyMuPDFReader()}
)

documents = reader.load_data()

node_parser = TokenTextSplitter(chunk_size=100, chunk_overlap=50)

nodes = node_parser.get_nodes_from_documents(documents, show_progress=True)

show_json(nodes[0])
show_json(nodes[1])

